package net.bwie.jtp.dws.log.utils;

import com.huaban.analysis.jieba.JiebaSegmenter;
import com.huaban.analysis.jieba.SegToken;
import com.huaban.analysis.jieba.WordDictionary;
import org.junit.Test;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;
import java.io.StringReader;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;


public class AnalyzerUtil {
    /**
     * 使用IKAnalyzer分词器对中文文本进行普通分词
     */
    public static List<String> ikAnalyzer(String content) throws Exception {
        ArrayList<String> list = new ArrayList<>() ;
        // 分词对象
        IKSegmenter ikSegmenter = new IKSegmenter(
                new StringReader(content), true
        ) ;
        // 遍历
        Lexeme lexeme;
        while ((lexeme = ikSegmenter.next()) != null) {
            String text = lexeme.getLexemeText();
            list.add(text);
        }
        // 返回
        return list;
    }

    @Test
    public void testDemo() {
        JiebaSegmenter segmenter = new JiebaSegmenter();
        String[] sentences = new String[]{"这是一个伸手不见五指的黑夜。我叫孙悟空，我爱北京，我爱Python和C++。" +
                "", "我不喜欢日本和服。", "雷猴回归人间。", "" +
                "工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作", "结过婚的和尚未结过婚的"};
        for (String sentence : sentences) {
            System.out.println(segmenter.process(sentence, JiebaSegmenter.SegMode.INDEX).toString());
//            System.out.println(sentence);
        }
    }


    /**
     * 使用jieba 分词对中文文本进行分词
     */

    public static List<String> jiebaAnalyzer(String content) {

        List<String> list = new ArrayList<>();

        JiebaSegmenter segmenter = new JiebaSegmenter();

        // 使用搜索引擎模式分词（更细粒度）
        List<SegToken> tokens = segmenter.process(content, JiebaSegmenter.SegMode.SEARCH);

        for (SegToken token : tokens) {
            list.add(token.word);

        }

        return list;
    }





    public static void main(String[] args) throws Exception{
        List<String> list = jiebaAnalyzer("我爱你中国");
        System.out.println(Arrays.toString(list.toArray()));
//        // 加载自定义的词典
//        Path path = FileSystems.getDefault().getPath(basePath+"/files", "dict.txt");
//        WordDictionary.getInstance().loadUserDict(path);
//        JiebaSegmenter jiebaSegmenter = new JiebaSegmenter();
//        List<String> strings = jiebaSegmenter.sentenceProcess("周杰伦周冬雨");
//        strings.stream().forEach(System.out::println);

    }
}
